####  Figure S13: Random forest variable importance (demographics) ####

# Libraries
# library(tidyverse)
# library(tidymodels)
# library(rio)
# library(here)
# library(ggthemes)

# source(here("Code","Functions","funcs_theme.R"))

rf_demos_vip = import(here("Output","randforest_demos_vip.rds"))

rf_demos_vip <- rf_demos_vip |> filter(DependentVar != "violence1re") |>
  filter(DependentVar != "violence2re") 

fig_s13 = rf_demos_vip |> 
  filter(ModelSample == "Pooled") |> 
  add_row(DependentVar = "norms_any") |> # Initializes an empty factor for plotting
  mutate(type = ifelse(str_detect(DependentVar, "norm"), "Norms", "Violence"),
         Variable = case_match(Variable,
                               "age" ~ "Age",
                               "faminc" ~ "Family Income",
                               "pid" ~ "Party ID",
                               "urban" ~ "Urban/Rural",
                               "strong" ~ "PID Strength",
                               "white" ~ "Race/Ethnicity",
                               "poli_interest" ~ "Interest in Politics",
                               "oppo_gov" ~ "Out-Party Governor",
                               "male" ~ "Gender",
                               "bornagain" ~ "Born Again",
                               "college" ~ "College Education",
                               "inparty" ~ "In-Party Affect",
                               "outparty" ~ "Out-Party Affect"),
         DependentVar = recode_factor(DependentVar,
                                      norm_pollingre = "Reduce outparty polling stations",
                                      norm_loyaltyre = "More loyal to party than election\nrules and constitution",
                                      norm_judgesre = "Ignore outparty court decisions",
                                      norm_executivere = "President should\ncircumvent Congress",
                                      norm_censorshipre = "Censor partisan media",
                                      norms_any = " ", # Here's the empty factor
                                      violence1re = "Protest without permit",
                                      violence2re = "Vandalism",
                                      violence3re = "Assault",
                                      violence4re = "Arson",
                                      violence5re = "Assault with deadly weapon",
                                      violence6re = "Murder")
  ) |> 
  filter(DependentVar != " ") |> # Removes empty row, but we'll tell R to remember its missing later
  
  ggplot(aes(y = tidytext::reorder_within(Variable, Importance, DependentVar),
             x = Importance)) +
  geom_bar(stat = "identity", position = "dodge") +
  tidytext::scale_y_reordered() +
  facet_wrap(~ DependentVar, scales = "free_y", ncol = 2, drop = F) + # drop = F remembers missing factor
  theme_prl() +
  labs(y = NULL,
       caption = "Fit with Random Forest, 1000 trees\nCalculated using bias-corrected Gini impurity scores (Nembrini et al. 2018)") +
  theme(axis.text = element_text(size = 6),
        axis.text.y = element_text(lineheight = .55),
        plot.caption = element_text(size = 6)) +
  theme( strip.text.x = element_text(size = 6),
         panel.spacing.y = unit(0, "lines"))

print(fig_s13)

ggsave(here("Plots","Supplementary","figure_s13.png"),
       fig_s13,
       dpi = 600,
       units = "in",
       width = 5, height = 7)